product_monthly = read.csv("product_monthly.csv")
store_monthly = read.csv("store_monthly.csv")
store_prod_monthly = read.csv("store_prod_monthly.csv")
head(product_monthly)
## X date brandy cocktails gin liqueur other rum schnapps
## 1 0 2012-01-31 40220.03 50637.75 37466.92 88247.04 12774.01 172926.8 72237.21
## 2 1 2012-02-29 43110.43 42954.46 41252.70 103028.37 13963.24 198711.0 72046.88
## 3 2 2012-03-31 41327.22 54192.36 44324.61 100299.67 16715.48 206779.6 72912.93
## 4 3 2012-04-30 43424.16 103627.89 50254.82 108193.01 12392.19 214852.2 73010.12
## 5 4 2012-05-31 45033.42 95183.20 57498.30 111795.26 15949.67 255571.5 75106.14
## 6 5 2012-06-30 41014.50 112738.67 54636.41 100194.82 30200.12 262501.3 68102.11
## tequila vodka whisky total
## 1 43175.16 357925.9 336471.7 1212082
## 2 50280.09 363623.7 457594.7 1386566
## 3 50674.13 399338.4 366308.6 1352873
## 4 67537.10 425101.6 401173.7 1499567
## 5 71564.38 481024.3 500706.4 1709433
## 6 70154.15 481137.8 397346.0 1618026
dim(product_monthly)
## [1] 104 13
Total volume trend
ts_total <- ts(product_monthly$total, start=c(2012, 1), end=c(2020, 8), frequency=12)
plot(ts_total, type = 'l')#, grid.ticks.on='M')
Total volume seasonal decomposition
stl_total = stl(ts_total, s.window = "periodic")
plot(stl_total, main = "Total Volume Seasonal Decomposition")
monthplot(ts_total)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
seasonplot(ts_total)
Auto arima forecast on total volume
ts_total_train = ts(product_monthly$total[1:78], start=c(2012, 1), end=c(2018, 6), frequency=12)
ts_total_test= ts(product_monthly$total[79:104], start=c(2018, 7), end=c(2020, 8), frequency=12)
fit_total = auto.arima(ts_total_train , test = "adf", trace=T)
##
## ARIMA(2,0,2)(1,1,1)[12] with drift : 1729.126
## ARIMA(0,0,0)(0,1,0)[12] with drift : 1741.34
## ARIMA(1,0,0)(1,1,0)[12] with drift : 1739.343
## ARIMA(0,0,1)(0,1,1)[12] with drift : 1737.175
## ARIMA(0,0,0)(0,1,0)[12] : 1745.553
## ARIMA(2,0,2)(0,1,1)[12] with drift : 1726.531
## ARIMA(2,0,2)(0,1,0)[12] with drift : 1724.026
## ARIMA(2,0,2)(1,1,0)[12] with drift : 1726.531
## ARIMA(1,0,2)(0,1,0)[12] with drift : 1734.561
## ARIMA(2,0,1)(0,1,0)[12] with drift : 1726.938
## ARIMA(3,0,2)(0,1,0)[12] with drift : 1726.414
## ARIMA(2,0,3)(0,1,0)[12] with drift : 1726.402
## ARIMA(1,0,1)(0,1,0)[12] with drift : 1737.817
## ARIMA(1,0,3)(0,1,0)[12] with drift : 1732.761
## ARIMA(3,0,1)(0,1,0)[12] with drift : 1726.334
## ARIMA(3,0,3)(0,1,0)[12] with drift : 1728.973
## ARIMA(2,0,2)(0,1,0)[12] : 1742.246
##
## Best model: ARIMA(2,0,2)(0,1,0)[12] with drift
Forecast total volume
plot(forecast(fit_total, h=26))
plot(forecast(fit_total, h=26))
Evaluating Arima model on total volume
AR_total.mean <-forecast(fit_total,h=26)$mean
plot(ts_total_test, main="ARIMA Forecast vs. Actual on Total Volume 2018-07 - 2020-08", ylab="", xlab="Months", col="darkblue")
lines(AR_total.mean, col="red")
fit_total_forecast <-forecast(fit_total,h=26)
accuracy(fit_total_forecast,ts_total_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 930.6258 93626.27 67558.65 -0.02416018 4.037865 0.6441391
## Test set 46322.1079 112030.70 99824.41 2.14575657 5.249035 0.9517775
## ACF1 Theil's U
## Training set -0.02552463 NA
## Test set -0.18548215 0.3769757
Whisky trend
ts_whisky <- ts(product_monthly$whisky, start=c(2012, 1), end=c(2020, 8), frequency=12)
stl_whisky = stl(ts_whisky, s.window = "periodic")
plot(ts_whisky, type = 'l')#, grid.ticks.on='M')
Whisky Seasonal decomposition
stl_whisky = stl(ts_whisky, s.window = "periodic")
plot(stl_whisky)
ACF test
acf(ts_whisky, lag.max = 100)
pacf(ts_whisky, lag.max = 100)
Choosing the beginning 75% of the monthly data (78 months) as training, remaining 25% as test
ts_whisky_train = ts(product_monthly$whisky[1:78], start=c(2012, 1), end=c(2018, 6), frequency=12)
ts_whisky_test= ts(product_monthly$whisky[79:104], start=c(2018, 7), end=c(2020, 8), frequency=12)
fit1 = auto.arima(ts_whisky_train , test = "adf", trace=T)
##
## ARIMA(2,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] with drift : 1594.707
## ARIMA(1,0,0)(1,1,0)[12] with drift : 1590.308
## ARIMA(0,0,1)(0,1,1)[12] with drift : 1586.574
## ARIMA(0,0,0)(0,1,0)[12] : 1607.394
## ARIMA(0,0,1)(0,1,0)[12] with drift : 1584.316
## ARIMA(0,0,1)(1,1,0)[12] with drift : 1586.575
## ARIMA(0,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,1)(0,1,0)[12] with drift : 1586.574
## ARIMA(0,0,2)(0,1,0)[12] with drift : 1586.563
## ARIMA(1,0,0)(0,1,0)[12] with drift : 1588.04
## ARIMA(1,0,2)(0,1,0)[12] with drift : 1584.776
## ARIMA(0,0,1)(0,1,0)[12] : 1609.084
##
## Best model: ARIMA(0,0,1)(0,1,0)[12] with drift
Auto Arima result with adf test
summary(fit1)
## Series: ts_whisky_train
## ARIMA(0,0,1)(0,1,0)[12] with drift
##
## Coefficients:
## ma1 drift
## -0.4625 1720.8968
## s.e. 0.1116 209.7703
##
## sigma^2 estimated as 1.454e+09: log likelihood=-788.96
## AIC=1583.93 AICc=1584.32 BIC=1590.5
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 176.0878 34535.17 26178.48 2.781573e-05 5.035107 0.6908371
## ACF1
## Training set 0.0001768834
checkresiduals(fit1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,1)(0,1,0)[12] with drift
## Q* = 41.823, df = 14, p-value = 0.0001319
##
## Model df: 2. Total lags used: 16
qqnorm(fit1$residuals)
Forecast fit1
plot(forecast(fit1, h=26))
# plot of the test set and its prediction only
AR.mean <-forecast(fit1,h=26)$mean
plot(ts_whisky_test, main="ARIMA Forecast on Whisky Volume 2018-07 - 2020-08", ylab="", xlab="Months", col="darkblue")
lines(AR.mean, col="red")
fit1_forecast <-forecast(fit1,h=26)
accuracy(fit1_forecast,ts_whisky_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 176.0878 34535.17 26178.48 2.781573e-05 5.035107 0.6908371
## Test set 3455.0143 39628.28 31624.94 1.012261e-01 5.257158 0.8345665
## ACF1 Theil's U
## Training set 0.0001768834 NA
## Test set -0.4083636345 0.3214094
Supermarket - Whisky
ts_sw <- ts(store_prod_monthly$Supermarketwhisky, start=c(2012, 1), end=c(2020, 8), frequency=12)
stl_sw = stl(ts_sw, s.window = "periodic")
plot(ts_sw, type = 'l')#, grid.ticks.on='M')
plot(stl_sw)
ts_sw_train = ts(store_prod_monthly$Supermarketwhisky[1:78], start=c(2012, 1), end=c(2018, 6), frequency=12)
ts_sw_test= ts(store_prod_monthly$Supermarketwhisky[79:104], start=c(2018, 7), end=c(2020, 8), frequency=12)
fit_sw = auto.arima(ts_sw_train, test = "adf", trace=T)
##
## ARIMA(2,0,2)(1,1,1)[12] with drift : 1591.086
## ARIMA(0,0,0)(0,1,0)[12] with drift : 1605.265
## ARIMA(1,0,0)(1,1,0)[12] with drift : 1595.382
## ARIMA(0,0,1)(0,1,1)[12] with drift : 1594.662
## ARIMA(0,0,0)(0,1,0)[12] : 1609.201
## ARIMA(2,0,2)(0,1,1)[12] with drift : 1588.656
## ARIMA(2,0,2)(0,1,0)[12] with drift : 1588.285
## ARIMA(2,0,2)(1,1,0)[12] with drift : 1588.596
## ARIMA(1,0,2)(0,1,0)[12] with drift : 1601.319
## ARIMA(2,0,1)(0,1,0)[12] with drift : 1591.127
## ARIMA(3,0,2)(0,1,0)[12] with drift : 1590.679
## ARIMA(2,0,3)(0,1,0)[12] with drift : 1590.668
## ARIMA(1,0,1)(0,1,0)[12] with drift : 1599.41
## ARIMA(1,0,3)(0,1,0)[12] with drift : 1597.111
## ARIMA(3,0,1)(0,1,0)[12] with drift : 1590.966
## ARIMA(3,0,3)(0,1,0)[12] with drift : Inf
## ARIMA(2,0,2)(0,1,0)[12] : 1597.496
##
## Best model: ARIMA(2,0,2)(0,1,0)[12] with drift
summary(fit_sw)
## Series: ts_sw_train
## ARIMA(2,0,2)(0,1,0)[12] with drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 drift
## -1.2768 -0.8669 1.0659 0.5436 1156.0819
## s.e. 0.0974 0.0926 0.1814 0.1723 311.3059
##
## sigma^2 estimated as 1.44e+09: log likelihood=-787.43
## AIC=1586.86 AICc=1588.28 BIC=1600
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 77.80647 33561.16 24946.52 -0.152588 6.928728 0.6579856
## ACF1
## Training set -0.02039364
checkresiduals(fit_sw)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,2)(0,1,0)[12] with drift
## Q* = 9.0383, df = 11, p-value = 0.6184
##
## Model df: 5. Total lags used: 16
qqnorm(fit_sw$residuals)
plot(forecast(fit_sw, h=30))
forecast(fit_sw, h=30)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jul 2018 355061.8 306426.1 403697.5 280679.9 429443.7
## Aug 2018 353555.8 303850.7 403261.0 277538.3 429573.3
## Sep 2018 396507.8 346733.2 446282.5 320384.1 472631.6
## Oct 2018 547488.0 496228.9 598747.1 469094.0 625882.1
## Nov 2018 366338.0 313367.1 419308.8 285326.1 447349.9
## Dec 2018 536970.7 483610.2 590331.2 455362.9 618578.6
## Jan 2019 295485.2 242019.0 348951.5 213715.6 377254.8
## Feb 2019 410193.8 355823.9 464563.7 327042.2 493345.4
## Mar 2019 337139.2 281912.6 392365.8 252677.4 421601.0
## Apr 2019 341960.9 286602.7 397319.0 257297.9 426623.8
## May 2019 473199.7 417729.1 528670.3 388364.7 558034.7
## Jun 2019 392484.8 336466.5 448503.2 306812.1 478157.5
## Jul 2019 363030.8 293183.9 432877.8 256209.1 469852.5
## Aug 2019 374363.1 304036.5 444689.7 266807.8 481918.4
## Sep 2019 406645.7 336315.2 476976.2 299084.4 514207.0
## Oct 2019 560118.4 489518.6 630718.2 452145.3 668091.5
## Nov 2019 385035.5 313924.7 456146.2 276280.9 493790.0
## Dec 2019 545761.2 474436.3 617086.2 436679.1 654843.3
## Jan 2020 311664.8 240339.1 382990.5 202581.6 420748.1
## Feb 2020 425527.8 354012.6 497043.0 316154.7 534900.9
## Mar 2020 347147.1 275349.5 418944.7 237342.2 456952.0
## Apr 2020 359502.0 287613.6 431390.3 249558.3 469445.7
## May 2020 485740.1 413846.0 557634.1 375787.6 595692.5
## Jun 2020 404879.3 332857.7 476900.9 294731.8 515026.8
## Jul 2020 379946.8 295575.0 464318.6 250911.2 508982.3
## Aug 2020 385632.7 300883.8 470381.6 256020.5 515244.9
## Sep 2020 421204.6 336443.1 505966.1 291573.0 550836.1
## Oct 2020 575372.6 490156.4 660588.8 445045.7 705699.5
## Nov 2020 396550.4 310754.6 482346.1 265337.1 527763.6
## Dec 2020 561447.5 475500.9 647394.1 430003.5 692891.4
575372.6 / 360
## [1] 1598.257
AR.mean.sw <-forecast(fit_sw,h=26)$mean
plot(ts_sw_test, main="ARIMA Forecast on Supermarket Whisky Volume 2018-07 - 2020-08", ylab="", xlab="Months", col="darkblue")
lines(AR.mean.sw, col="red")
fit_sw_forecast <-forecast(fit_sw,h=26)
accuracy(fit_sw_forecast,ts_sw_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 77.80647 33561.16 24946.52 -0.1525880 6.928728 0.6579856
## Test set 4764.31987 41595.13 31862.75 0.2200031 7.694372 0.8404070
## ACF1 Theil's U
## Training set -0.02039364 NA
## Test set -0.33060812 0.3361376
Supermarket + Vodka
ts_sv <- ts(store_prod_monthly$Supermarketvodka, start=c(2012, 1), end=c(2020, 8), frequency=12)
ts_sv_train = ts(store_prod_monthly$Supermarketvodka[1:78], start=c(2012, 1), end=c(2018, 6), frequency=12)
ts_sv_test= ts(store_prod_monthly$Supermarketvodka[79:104], start=c(2018, 7), end=c(2020, 8), frequency=12)
fit_sv = auto.arima(ts_sv_train, test = "adf", trace=T)
##
## ARIMA(2,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] with drift : 1589.069
## ARIMA(1,0,0)(1,1,0)[12] with drift : 1589.727
## ARIMA(0,0,1)(0,1,1)[12] with drift : 1588.185
## ARIMA(0,0,0)(0,1,0)[12] : 1595.849
## ARIMA(0,0,1)(0,1,0)[12] with drift : 1586.219
## ARIMA(0,0,1)(1,1,0)[12] with drift : 1588.244
## ARIMA(0,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,1)(0,1,0)[12] with drift : 1588.045
## ARIMA(0,0,2)(0,1,0)[12] with drift : 1587.654
## ARIMA(1,0,0)(0,1,0)[12] with drift : 1587.735
## ARIMA(1,0,2)(0,1,0)[12] with drift : 1589.044
## ARIMA(0,0,1)(0,1,0)[12] : 1597.591
##
## Best model: ARIMA(0,0,1)(0,1,0)[12] with drift
summary(fit_sv)
## Series: ts_sv_train
## ARIMA(0,0,1)(0,1,0)[12] with drift
##
## Coefficients:
## ma1 drift
## -0.3106 1253.684
## s.e. 0.1266 271.503
##
## sigma^2 estimated as 1.499e+09: log likelihood=-789.92
## AIC=1585.83 AICc=1586.22 BIC=1592.4
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 100.0396 35073.36 26151.97 -0.7145016 7.969356 0.7530406
## ACF1
## Training set 0.02548068
checkresiduals(fit_sv)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,1)(0,1,0)[12] with drift
## Q* = 21.795, df = 14, p-value = 0.08292
##
## Model df: 2. Total lags used: 16
qqnorm(fit_sv$residuals)
AR.mean.sv <-forecast(fit_sv,h=26)$mean
plot(ts_sv_test, main="ARIMA Forecast on Supermarket Whisky Volume 2018-07 - 2020-08", ylab="", xlab="Months", col="darkblue")
lines(AR.mean.sv, col="red")
fit_sv_forecast <-forecast(fit_sv,h=26)
accuracy(fit_sv_forecast,ts_sv_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 100.0396 35073.36 26151.97 -0.7145016 7.969356 0.7530406
## Test set 9476.5216 41410.12 33582.01 1.7643123 8.137148 0.9669871
## ACF1 Theil's U
## Training set 0.02548068 NA
## Test set 0.04463531 0.6372044
Store type
head(store_monthly)
## X date X0.0 Casino Convenience.Store Drug.Store
## 1 0 2012-01-31 51305.81 5500.00 110256.2 112604.0
## 2 1 2012-02-29 55205.01 4486.75 121661.6 112396.2
## 3 2 2012-03-31 54217.05 4727.00 127321.0 112013.1
## 4 3 2012-04-30 57251.25 3896.00 132903.4 121393.0
## 5 4 2012-05-31 71418.98 4567.00 155873.8 154428.3
## 6 5 2012-06-30 65401.16 4155.00 155662.9 121631.2
## Liquor.Tobacco.Store Other.Grocery.or.Convenience Supermarket total
## 1 570069.3 70543.85 743882.7 1664162
## 2 629843.3 71590.78 937311.4 1932495
## 3 634360.6 66930.42 851741.2 1851310
## 4 677777.4 73353.89 1028583.7 2095159
## 5 754024.6 100358.39 1115182.1 2355853
## 6 649632.1 85461.18 1204114.5 2286058
ts_supermarket = ts(store_monthly$Supermarket, start=c(2012, 1), end=c(2020, 8), frequency=12)
ts_conv = ts(store_monthly$Convenience.Store, start=c(2012, 1), end=c(2020, 8), frequency=12)
ts_liquor = ts(store_monthly$Liquor.Tobacco.Store, start=c(2012, 1), end=c(2020, 8), frequency=12)
plot(ts_supermarket, type = "l")
plot(ts_conv, type = "l")
plot(ts_liquor, type = "l")